Random forest exploratory analysis

Load the data and metadata. In this analysis, I will be using the cleaned RPKM data.

Load libraries:

library(kernlab)
## Warning: package 'kernlab' was built under R version 3.0.2
library(cvTools)
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.0.2
## Loading required package: robustbase
library(randomForest)
## randomForest 4.6-7
## Type rfNews() to see new features/changes/bug fixes.
library(edgeR)
## Loading required package: limma
library(caret)
## Warning: package 'caret' was built under R version 3.0.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.0.2
library(plyr)
## Warning: package 'plyr' was built under R version 3.0.2
library(limma)
library(VennDiagram)
## Warning: package 'VennDiagram' was built under R version 3.0.2
## Loading required package: grid
library(xtable)
## Warning: package 'xtable' was built under R version 3.0.2
rDes <- read.delim("../../data/experimental_design_cleaned.txt")
rownames(rDes) <- rDes$TCGA_patient_id
rDat <- read.delim("../../data/aml.rnaseq.gaf2.0_rpkm_cleaned.txt", row.names = 1, 
    check.names = FALSE)

all.results <- list()

1) Functions for cross-validation

# Function to select features using linear models input.dat: training data
# set input.labels: true outcomes for training data
fs.lm <- function(input.dat, input.labels) {
    norm.factor <- calcNormFactors(input.dat)
    design <- model.matrix(~input.labels)
    colnames(design) <- c("Intercept", "Label")
    dat.voomed <- voom(input.dat, design, lib.size = colSums(input.dat) * norm.factor)
    fit <- lmFit(dat.voomed, design)
    ebFit <- eBayes(fit)
    hits <- topTable(ebFit, n = Inf, coef = "Label")
    train.features <- hits$ID[1:25]  #FOR OLDER VERSION OF R
    # train.features <- rownames(hits)[1:25]
    return(train.features)
}
# Function to choose features using correlations to outcomes input.dat:
# training data set input.labels: true outcomes for training data
fs.corr <- function(input.dat, input.levels) {
    gene.corrs <- apply(input.dat, 1, function(x) return(suppressWarnings(cor(x, 
        as.numeric(input.levels), method = "spearman"))))
    gene.corrs <- gene.corrs[order(abs(gene.corrs), na.last = NA, decreasing = TRUE)]
    return(names(gene.corrs[1:25]))
}
# Function to run k-fold cross validation with a random forest all.dat: all
# data used in the analysis all.labels: true outcomes for the data K: number
# of folds to use in CV fs.method: the strategy to use for feature selection
rf.cv <- function(all.dat, all.labels, all.levels, K = 5, fs.method = "lm", 
    plot.varimp = TRUE, conf.mat.flip = FALSE) {
    set.seed(540)
    folds <- cvFolds(ncol(all.dat), K = K)

    conf_matrix <- matrix(0, nrow = 2, ncol = 2, dimnames = list(c("true0", 
        "true1"), c("pred0", "pred1")))
    feature.list <- list()

    for (f in 1:K) {
        train.samples <- folds$subsets[folds$which != f, ]
        test.samples <- folds$subsets[folds$which == f, ]

        if (fs.method == "lm") {
            train.features <- fs.lm(all.dat[, train.samples], all.labels[train.samples])
        } else if (fs.method == "corr") {
            train.features <- fs.corr(all.dat[, train.samples], all.levels[train.samples])
        }
        feature.list[[paste("Fold", f, sep = "")]] <- train.features

        train.dat <- data.frame(class = all.labels[train.samples], t(all.dat[train.features, 
            train.samples]))
        test.dat <- data.frame(t(all.dat[train.features, test.samples]))
        test.labels <- all.labels[test.samples]

        fit.rf <- randomForest(class ~ ., train.dat)
        if (plot.varimp) {
            varImpPlot(fit.rf)
        }

        pred.rf <- predict(fit.rf, newdata = test.dat, type = "response")

        results <- table(factor(test.labels, levels = c(0, 1)), factor(pred.rf, 
            levels = c(0, 1)), dnn = c("obs", "pred"))

        if (conf.mat.flip) {
            conf_matrix[2, 2] <- conf_matrix[2, 2] + results[1, 1]
            conf_matrix[2, 1] <- conf_matrix[2, 1] + results[1, 2]
            conf_matrix[1, 2] <- conf_matrix[1, 2] + results[2, 1]
            conf_matrix[1, 1] <- conf_matrix[1, 1] + results[2, 2]
        } else {
            conf_matrix[1, 1] <- conf_matrix[1, 1] + results[1, 1]
            conf_matrix[1, 2] <- conf_matrix[1, 2] + results[1, 2]
            conf_matrix[2, 1] <- conf_matrix[2, 1] + results[2, 1]
            conf_matrix[2, 2] <- conf_matrix[2, 2] + results[2, 2]
        }
    }

    rf.sens <- conf_matrix[2, 2]/sum(conf_matrix[2, ])
    rf.spec <- conf_matrix[1, 1]/sum(conf_matrix[1, ])
    rf.acc <- (conf_matrix[1, 1] + conf_matrix[2, 2])/sum(conf_matrix)

    return(list(acc = rf.acc, sens = rf.sens, spec = rf.spec, feature.list = feature.list))
}

2) Train RF to predict “Poor” cytogenetic risk, use linear models for feature selection

Set up the data. Remove samples where the cytogenetic risk category is not determined, and set categories to poor and not poor:

rfDes <- rDes[rDes$Cytogenetic_risk != "N.D.", ]
rf.labels <- mapvalues(rfDes$Cytogenetic_risk, c("Good", "Intermediate", "Poor"), 
    c(0, 0, 1), warn_missing = TRUE)
rf.labels <- factor(rf.labels)
rf.levels <- mapvalues(rfDes$Cytogenetic_risk, c("Good", "Intermediate", "Poor"), 
    c(3, 2, 1), warn_missing = TRUE)
rf.levels <- factor(rf.levels)
rfDat <- rDat[, rownames(rfDes)]

Run a 5-fold cross-validation for the data:

cv.risk.res <- rf.cv(rfDat, rf.labels, rf.levels, K = 5, fs.method = "lm")

plot of chunk unnamed-chunk-7 plot of chunk unnamed-chunk-7 plot of chunk unnamed-chunk-7 plot of chunk unnamed-chunk-7 plot of chunk unnamed-chunk-7

cv.risk.res[1:3]
## $acc
## [1] 0.8523
## 
## $sens
## [1] 0.5714
## 
## $spec
## [1] 0.9403
all.results[["lm.poor"]] <- cv.risk.res[1:3]
fts <- cv.risk.res[[4]]

plot.new()
venn.plot <- venn.diagram(fts, filename = NULL, fill = c("red", "blue", "green", 
    "yellow", "purple"), margin = 0.1)
grid.draw(venn.plot)

plot of chunk unnamed-chunk-7


(common.fts.risk <- intersect(fts[[1]], intersect(fts[[2]], intersect(fts[[3]], 
    intersect(fts[[4]], fts[[5]])))))
## [1] "SCD|6319_calculated"     "STYXL1|51657_calculated"
## [3] "PDAP1|11333_calculated"  "LUC7L2|51631_calculated"
## [5] "GSTK1|373156_calculated"

3) Predict the different cytogenetic mutations, use linear models for feature selection

First look at trisomy 8:

cv.trisomy8.res <- rf.cv(rDat, factor(as.numeric(rDes$trisomy_8)), rf.levels, 
    K = 5, fs.method = "lm")

plot of chunk unnamed-chunk-8 plot of chunk unnamed-chunk-8 plot of chunk unnamed-chunk-8 plot of chunk unnamed-chunk-8 plot of chunk unnamed-chunk-8

cv.trisomy8.res[1:3]
## $acc
## [1] 0.9553
## 
## $sens
## [1] 0.6842
## 
## $spec
## [1] 0.9875
all.results[["lm.trisomy8"]] <- cv.trisomy8.res[1:3]
fts <- cv.trisomy8.res[[4]]

plot.new()
venn.plot <- venn.diagram(fts, filename = NULL, fill = c("red", "blue", "green", 
    "yellow", "purple"), margin = 0.1)
grid.draw(venn.plot)

plot of chunk unnamed-chunk-8


(common.fts.trisomy8 <- intersect(fts[[1]], intersect(fts[[2]], intersect(fts[[3]], 
    intersect(fts[[4]], fts[[5]])))))
## [1] "NEIL2|252969_calculated"   "PPP2R2A|5520_calculated"  
## [3] "ZNF7|7553_calculated"      "KIAA1967|57805_calculated"
## [5] "R3HCC1|203069_calculated"  "TSNARE1|203062_calculated"
## [7] "C8orf55|51337_calculated"  "ZFP41|286128_calculated"

Next look at deletions of chromosome 5:

cv.del5.res <- rf.cv(rDat, factor(as.numeric(rDes$del_5)), rf.levels, K = 5, 
    fs.method = "lm")

plot of chunk unnamed-chunk-9 plot of chunk unnamed-chunk-9 plot of chunk unnamed-chunk-9 plot of chunk unnamed-chunk-9 plot of chunk unnamed-chunk-9

cv.del5.res[1:3]
## $acc
## [1] 0.9721
## 
## $sens
## [1] 0.8125
## 
## $spec
## [1] 0.9877
all.results[["lm.del5"]] <- cv.del5.res[1:3]
fts <- cv.del5.res[[4]]

pdf("../../poster/rf_venn.pdf")
plot.new()
venn.plot <- venn.diagram(fts, filename = NULL, fill = c("red", "blue", "green", 
    "yellow", "purple"), margin = 0.1)
grid.draw(venn.plot)
dev.off()
## pdf 
##   2

(common.fts.del5 <- intersect(fts[[1]], intersect(fts[[2]], intersect(fts[[3]], 
    intersect(fts[[4]], fts[[5]])))))
##  [1] "KDM3B|51780_calculated"        "EIF4EBP3|8637_calculated"     
##  [3] "PCBD2|84105_calculated"        "KIAA0141|9812|1of2_calculated"
##  [5] "PFDN1|5201_calculated"         "RBM22|55696_calculated"       
##  [7] "ZMAT2|153527_calculated"       "CSNK1A1|1452_calculated"      
##  [9] "PPP2CA|5515_calculated"        "WDR55|54853_calculated"       
## [11] "CATSPER3|347732_calculated"    "HARS|3035_calculated"

Next look at deletions of chromosome 7:

cv.del7.res <- rf.cv(rDat, factor(as.numeric(rDes$del_7)), rf.levels, K = 5, 
    fs.method = "lm")

plot of chunk unnamed-chunk-10 plot of chunk unnamed-chunk-10 plot of chunk unnamed-chunk-10 plot of chunk unnamed-chunk-10 plot of chunk unnamed-chunk-10

cv.del7.res[1:3]
## $acc
## [1] 0.9497
## 
## $sens
## [1] 0.7143
## 
## $spec
## [1] 0.981
all.results[["lm.del7"]] <- cv.del7.res[1:3]
fts <- cv.del7.res[[4]]

plot.new()
venn.plot <- venn.diagram(fts, filename = NULL, fill = c("red", "blue", "green", 
    "yellow", "purple"), margin = 0.1)
grid.draw(venn.plot)

plot of chunk unnamed-chunk-10


(common.fts.del7 <- intersect(fts[[1]], intersect(fts[[2]], intersect(fts[[3]], 
    intersect(fts[[4]], fts[[5]])))))
## [1] "LUC7L2|51631_calculated"   "PDAP1|11333_calculated"   
## [3] "MKRN1|23608_calculated"    "SLC25A13|10165_calculated"
## [5] "GATAD1|57798_calculated"   "GSTK1|373156_calculated"  
## [7] "STYXL1|51657_calculated"   "SUMF2|25870_calculated"   
## [9] "CASP2|835_calculated"

Now get a sense of how many genes are shared between these 4 different sets:

fts.all <- list(risk = common.fts.risk, trisomy8 = common.fts.trisomy8, del5 = common.fts.del5, 
    del7 = common.fts.del7)
plot.new()
venn.plot <- venn.diagram(fts.all, filename = NULL, fill = c("red", "yellow", 
    "blue", "green"))
grid.draw(venn.plot)

plot of chunk unnamed-chunk-11

4) Train RF to predict cytogenetic risk, use correlations for feature selection

Run a 5-fold cross-validation for the data:

cv.risk.res <- rf.cv(rfDat, rf.labels, rf.levels, K = 5, fs.method = "corr")

plot of chunk unnamed-chunk-12 plot of chunk unnamed-chunk-12 plot of chunk unnamed-chunk-12 plot of chunk unnamed-chunk-12 plot of chunk unnamed-chunk-12

cv.risk.res[1:3]
## $acc
## [1] 0.8239
## 
## $sens
## [1] 0.381
## 
## $spec
## [1] 0.9627
all.results[["corr.poor"]] <- cv.risk.res[1:3]
fts <- cv.risk.res[[4]]

plot.new()
venn.plot <- venn.diagram(fts, filename = NULL, fill = c("red", "blue", "green", 
    "yellow", "purple"), margin = 0.1)
grid.draw(venn.plot)

plot of chunk unnamed-chunk-12


(common.fts.risk <- intersect(fts[[1]], intersect(fts[[2]], intersect(fts[[3]], 
    intersect(fts[[4]], fts[[5]])))))
## [1] "PDE4DIP|9659_calculated"  "PHKA1|5255_calculated"   
## [3] "SDPR|8436_calculated"     "RECK|8434_calculated"    
## [5] "IL7|3574_calculated"      "STARD10|10809_calculated"

5) Predict the different cytogenetic mutations, use correlations for feature selection

First look at trisomy 8:

cv.trisomy8.res <- rf.cv(rDat, factor(as.numeric(rDes$trisomy_8)), factor(as.numeric(rDes$trisomy_8)), 
    K = 5, fs.method = "corr")

plot of chunk unnamed-chunk-13 plot of chunk unnamed-chunk-13 plot of chunk unnamed-chunk-13 plot of chunk unnamed-chunk-13 plot of chunk unnamed-chunk-13

cv.trisomy8.res[1:3]
## $acc
## [1] 0.9609
## 
## $sens
## [1] 0.6842
## 
## $spec
## [1] 0.9938
all.results[["corr.trisomy8"]] <- cv.trisomy8.res[1:3]
fts <- cv.trisomy8.res[[4]]

plot.new()
venn.plot <- venn.diagram(fts, filename = NULL, fill = c("red", "blue", "green", 
    "yellow", "purple"), margin = 0.1)
grid.draw(venn.plot)

plot of chunk unnamed-chunk-13


(common.fts.trisomy8 <- intersect(fts[[1]], intersect(fts[[2]], intersect(fts[[3]], 
    intersect(fts[[4]], fts[[5]])))))
##  [1] "NEIL2|252969_calculated"   "KIAA1967|57805_calculated"
##  [3] "R3HCC1|203069_calculated"  "DOCK5|80005_calculated"   
##  [5] "BIN3|55909_calculated"     "POLR3D|661_calculated"    
##  [7] "PPP2R2A|5520_calculated"   "TSNARE1|203062_calculated"
##  [9] "ZNF7|7553_calculated"      "HEATR7A|727957_calculated"
## [11] "COMMD5|28991_calculated"   "IKBKB|3551_calculated"

Next look at deletions of chromosome 5:

cv.del5.res <- rf.cv(rDat, factor(as.numeric(rDes$del_5)), factor(as.numeric(rDes$del_5)), 
    K = 5, fs.method = "corr")

plot of chunk unnamed-chunk-14 plot of chunk unnamed-chunk-14 plot of chunk unnamed-chunk-14 plot of chunk unnamed-chunk-14 plot of chunk unnamed-chunk-14

cv.del5.res[1:3]
## $acc
## [1] 0.9385
## 
## $sens
## [1] 0.4375
## 
## $spec
## [1] 0.9877
all.results[["corr.del5"]] <- cv.del5.res[1:3]
fts <- cv.del5.res[[4]]

plot.new()
venn.plot <- venn.diagram(fts, filename = NULL, fill = c("red", "blue", "green", 
    "yellow", "purple"), margin = 0.1)
grid.draw(venn.plot)

plot of chunk unnamed-chunk-14


(common.fts.del5 <- intersect(fts[[1]], intersect(fts[[2]], intersect(fts[[3]], 
    intersect(fts[[4]], fts[[5]])))))
## [1] "DSCAM|1826_calculated"

Next look at deletions of chromosome 7:

cv.del7.res <- rf.cv(rDat, factor(as.numeric(rDes$del_7)), factor(as.numeric(rDes$del_7)), 
    K = 5, fs.method = "corr")

plot of chunk unnamed-chunk-15 plot of chunk unnamed-chunk-15 plot of chunk unnamed-chunk-15 plot of chunk unnamed-chunk-15 plot of chunk unnamed-chunk-15

cv.del7.res[1:3]
## $acc
## [1] 0.9497
## 
## $sens
## [1] 0.7143
## 
## $spec
## [1] 0.981
all.results[["corr.del7"]] <- cv.del7.res[1:3]
fts <- cv.del7.res[[4]]

plot.new()
venn.plot <- venn.diagram(fts, filename = NULL, fill = c("red", "blue", "green", 
    "yellow", "purple"), margin = 0.1)
grid.draw(venn.plot)

plot of chunk unnamed-chunk-15


(common.fts.del7 <- intersect(fts[[1]], intersect(fts[[2]], intersect(fts[[3]], 
    intersect(fts[[4]], fts[[5]])))))
## [1] "MKRN1|23608_calculated"  "LUC7L2|51631_calculated"
## [3] "GSTK1|373156_calculated" "PDAP1|11333_calculated" 
## [5] "FSCN3|29999_calculated"  "CASP2|835_calculated"   
## [7] "PRKAG2|51422_calculated" "CPSF4|10898_calculated" 
## [9] "ARF5|381_calculated"

Now get a sense of how many genes are shared between these 4 different sets:

fts.all <- list(risk = common.fts.risk, trisomy8 = common.fts.trisomy8, del5 = common.fts.del5, 
    del7 = common.fts.del7)
plot.new()
venn.plot <- venn.diagram(fts.all, filename = NULL, fill = c("red", "yellow", 
    "blue", "green"))
grid.draw(venn.plot)

plot of chunk unnamed-chunk-16

6) Train RF to predict “Good” cytogenetic risk, use linear models for feature selection

Set up the data. Remove samples where the cytogenetic risk category is not determined, and set categories to good and not good:

rfDes <- rDes[rDes$Cytogenetic_risk != "N.D.", ]
rf.labels <- mapvalues(rfDes$Cytogenetic_risk, c("Good", "Intermediate", "Poor"), 
    c(1, 0, 0), warn_missing = TRUE)
rf.labels <- factor(rf.labels)
rf.levels <- mapvalues(rfDes$Cytogenetic_risk, c("Good", "Intermediate", "Poor"), 
    c(3, 2, 1), warn_missing = TRUE)
rf.levels <- factor(rf.levels)
rfDat <- rDat[, rownames(rfDes)]

Run a 5-fold cross-validation for the data:

cv.risk.res <- rf.cv(rfDat, rf.labels, rf.levels, K = 5, fs.method = "lm")

plot of chunk unnamed-chunk-18 plot of chunk unnamed-chunk-18 plot of chunk unnamed-chunk-18 plot of chunk unnamed-chunk-18 plot of chunk unnamed-chunk-18

cv.risk.res[1:3]
## $acc
## [1] 0.9659
## 
## $sens
## [1] 0.8485
## 
## $spec
## [1] 0.993
all.results[["lm.good"]] <- cv.risk.res[1:3]
fts <- cv.risk.res[[4]]

plot.new()
venn.plot <- venn.diagram(fts, filename = NULL, fill = c("red", "blue", "green", 
    "yellow", "purple"), margin = 0.1)
grid.draw(venn.plot)

plot of chunk unnamed-chunk-18


(common.fts.risk <- intersect(fts[[1]], intersect(fts[[2]], intersect(fts[[3]], 
    intersect(fts[[4]], fts[[5]])))))
##  [1] "CPNE8|144402_calculated"  "HOXA7|3204_calculated"   
##  [3] "HOXA6|3203_calculated"    "HOXA5|3202_calculated"   
##  [5] "HOXA3|3200_calculated"    "HOXA4|3201_calculated"   
##  [7] "HOXA9|3205_calculated"    "HOXA10|3206_calculated"  
##  [9] "HOXA2|3199_calculated"    "FGFR1|2260_calculated"   
## [11] "CYP7B1|9420_calculated"   "PDE4DIP|9659_calculated" 
## [13] "HOXB5|3215_calculated"    "NKX2-3|159296_calculated"
## [15] "LPO|4025_calculated"      "RMND5B|64777_calculated" 
## [17] "PRDM16|63976_calculated"  "HOXB6|3216_calculated"

7) Train RF to predict “Good” cytogenetic risk, use correlations for feature selection

cv.risk.res <- rf.cv(rfDat, rf.labels, rf.levels, K = 5, fs.method = "corr")

plot of chunk unnamed-chunk-19 plot of chunk unnamed-chunk-19 plot of chunk unnamed-chunk-19 plot of chunk unnamed-chunk-19 plot of chunk unnamed-chunk-19

cv.risk.res[1:3]
## $acc
## [1] 0.9602
## 
## $sens
## [1] 0.8485
## 
## $spec
## [1] 0.986
all.results[["corr.good"]] <- cv.risk.res[1:3]
fts <- cv.risk.res[[4]]

plot.new()
venn.plot <- venn.diagram(fts, filename = NULL, fill = c("red", "blue", "green", 
    "yellow", "purple"), margin = 0.1)
grid.draw(venn.plot)

plot of chunk unnamed-chunk-19


(common.fts.risk <- intersect(fts[[1]], intersect(fts[[2]], intersect(fts[[3]], 
    intersect(fts[[4]], fts[[5]])))))
## [1] "PDE4DIP|9659_calculated"  "PHKA1|5255_calculated"   
## [3] "SDPR|8436_calculated"     "RECK|8434_calculated"    
## [5] "IL7|3574_calculated"      "STARD10|10809_calculated"

8) Train RF to predict “Intermediate” cytogenetic risk, use linear models for feature selection

Set up the data. Remove samples where the cytogenetic risk category is not determined, and set categories to intermediate and not intermediate:

rfDes <- rDes[rDes$Cytogenetic_risk != "N.D.", ]
rf.labels <- mapvalues(rfDes$Cytogenetic_risk, c("Good", "Intermediate", "Poor"), 
    c(0, 1, 0), warn_missing = TRUE)
rf.labels <- factor(rf.labels)
rf.levels <- mapvalues(rfDes$Cytogenetic_risk, c("Good", "Intermediate", "Poor"), 
    c(3, 2, 1), warn_missing = TRUE)
rf.levels <- factor(rf.levels)
rfDat <- rDat[, rownames(rfDes)]

Run a 5-fold cross-validation for the data:

cv.risk.res <- rf.cv(rfDat, rf.labels, rf.levels, K = 5, fs.method = "lm", conf.mat.flip = TRUE)

plot of chunk unnamed-chunk-21 plot of chunk unnamed-chunk-21 plot of chunk unnamed-chunk-21 plot of chunk unnamed-chunk-21 plot of chunk unnamed-chunk-21

cv.risk.res[1:3]
## $acc
## [1] 0.8636
## 
## $sens
## [1] 0.8133
## 
## $spec
## [1] 0.901
all.results[["lm.intermediate"]] <- cv.risk.res[1:3]
fts <- cv.risk.res[[4]]

plot.new()
venn.plot <- venn.diagram(fts, filename = NULL, fill = c("red", "blue", "green", 
    "yellow", "purple"), margin = 0.1)
grid.draw(venn.plot)

plot of chunk unnamed-chunk-21


(common.fts.risk <- intersect(fts[[1]], intersect(fts[[2]], intersect(fts[[3]], 
    intersect(fts[[4]], fts[[5]])))))
##  [1] "NAV1|89796_calculated"    "SLC18A2|6571_calculated" 
##  [3] "LASS4|79603_calculated"   "PBX3|5090_calculated"    
##  [5] "HOXB6|3216_calculated"    "HOXB5|3215_calculated"   
##  [7] "NKX2-3|159296_calculated" "IQCE|23288_calculated"   
##  [9] "EVPL|2125_calculated"     "C7orf50|84310_calculated"

9) Train RF to predict “Intermediate” cytogenetic risk, use correlations for feature selection

cv.risk.res <- rf.cv(rfDat, rf.labels, rf.levels, K = 5, fs.method = "corr", 
    conf.mat.flip = TRUE)

plot of chunk unnamed-chunk-22 plot of chunk unnamed-chunk-22 plot of chunk unnamed-chunk-22 plot of chunk unnamed-chunk-22 plot of chunk unnamed-chunk-22

cv.risk.res[1:3]
## $acc
## [1] 0.7784
## 
## $sens
## [1] 0.6133
## 
## $spec
## [1] 0.901
all.results[["corr.intermediate"]] <- cv.risk.res[1:3]
fts <- cv.risk.res[[4]]

plot.new()
venn.plot <- venn.diagram(fts, filename = NULL, fill = c("red", "blue", "green", 
    "yellow", "purple"), margin = 0.1)
grid.draw(venn.plot)

plot of chunk unnamed-chunk-22


(common.fts.risk <- intersect(fts[[1]], intersect(fts[[2]], intersect(fts[[3]], 
    intersect(fts[[4]], fts[[5]])))))
## [1] "PDE4DIP|9659_calculated"  "PHKA1|5255_calculated"   
## [3] "SDPR|8436_calculated"     "RECK|8434_calculated"    
## [5] "IL7|3574_calculated"      "STARD10|10809_calculated"

10) Summarize results

all.results.df <- data.frame(matrix(unlist(all.results), ncol = 3, byrow = TRUE))
rownames(all.results.df) <- names(all.results)
colnames(all.results.df) <- c("accuracy", "sensitivity", "specificity")
all.results.xt <- xtable(all.results.df)
print(all.results.xt, type = "html")
accuracy sensitivity specificity
lm.poor 0.85 0.57 0.94
lm.trisomy8 0.96 0.68 0.99
lm.del5 0.97 0.81 0.99
lm.del7 0.95 0.71 0.98
corr.poor 0.82 0.38 0.96
corr.trisomy8 0.96 0.68 0.99
corr.del5 0.94 0.44 0.99
corr.del7 0.95 0.71 0.98
lm.good 0.97 0.85 0.99
corr.good 0.96 0.85 0.99
lm.intermediate 0.86 0.81 0.90
corr.intermediate 0.78 0.61 0.90